Salmon LHT data for DIASPARA 2.2

Author

Viktor Thunell

Published

May 2, 2025

Load libraries

Show code
# Load libraries, install if needed
pkgs <- c("tidyverse", "tidylog", "devtools","viridis","nls.multstart", "broom", "patchwork", "sdmTMB", "stringi") 

if(length(setdiff(pkgs,rownames(installed.packages()))) > 0){
  
    install.packages(setdiff(pkgs, rownames(installed.packages())), dependencies = T)
  
  }

invisible(lapply(pkgs, library, character.only = T))

options(ggplot2.continuous.colour = "viridis")
#theme_set(theme_grey()) # check why global theme option not working when rendering
devtools::source_url("https://raw.githubusercontent.com/VThunell/diasp-lht/main/R/functions/map-plot.R") # message of SHA-1 hash for file

# Set path
home <- here::here()

1. Read data

1a. Read Length at age data

Four data sets build the length at age data:

  1. Sweden SLU database “Sötebasen”
    • At locations across Sweden on both the Baltic side and in the Western sea
    • From recreational and commercial catches and scientific surveys
    • We assume these are fish returning to spawn but catches are sometime coastal(!)
    • Coordinates of site (river or sometimes region, i.e. Baltic Sea or West coast). This is not the exact catch place.
  2. Finland back calculated growth data
    • Rod, trap and netting methods.
    • From river Tornionjoki with tributaries.
  3. Finland catch data
    • GEAR
    • From river Tornionjoki with tributaries
    • Individuals hatched in 1994 have been excluded as there are uncertainties about origin (wild or reared).
  4. France catch data
    • Recreational and commercial fishing and scientific trapping
    • from rivers throughout the French Atlantic coast

The data sets is read below and variable names of interest (age, length, sex, year, site, origin) are standardized.

The data is filtered and cleaned in Section 4 when combining data.

Show code
# Sweden SLU database "Sötebasen"
swe.sallaa <- read_delim(file = "/Users/vitl0001/Documents/Projects/DIASPARA/Incoming data/salmon/LaxindividerSötebasen.csv", delim = ";", locale = locale(encoding="latin1"), col_types = cols(Märkning2 = col_character(), Märke2Nr = col_character(), ÅlderLek3 = col_character())) %>%
  # remove individuals without length or sea age
  rename(age.sea = AdultÅlder,
         age.sm = SmoltÅlder,
         length = Längd,
         weight = Vikt,
         year = Årtal,
         origin = Ursprung,
         site = VattenNamn,
         stage = Stadium
         ) %>%
  mutate(country = "SWE",
         sex = if_else(Kön %in% c("m","f"), Kön, NA),
         origin = case_when(origin == "Odlad" ~ "reared",
                            origin == "Vild" ~ "wild",
                            .default = origin))
# Columns that need to be fixed if used: Märkning2, Märke2Nr, ÅlderLek3

# Finnish back calculated growth
fin.sallaa <- read_delim(file = "/Users/vitl0001/Documents/Projects/DIASPARA/Incoming data/salmon/Tornionjoki_growth measurements_1970s-.txt", delim = "\t", locale = locale(encoding="latin1", decimal_mark = ",")) %>%
  rename(age.sea = `SEA-AGE`,
         age.sm = `SMOLT AGE`,
         length = LENGTH,
         weight = WEIGHT,
         year = YEAR,
         ) %>%
  # assuming 2 is female (larger mean length)
  mutate(sex = if_else(SEX == 2, "f", "m", missing = NA), 
         country = "FIN",
         origin = NA,
         site = "Tornionjoki")

# Finnish catch data
fin.sallaa2 <- read_csv(file = "/Users/vitl0001/Documents/Projects/DIASPARA/Incoming data/salmon/Non-kelts, data-Table 1.csv") %>%
  rename(age.sea = `SEA-AGE`,
         age.sm = `SMOLT AGE`,
         length = `LENGTH mm`,
         weight = `WEIGHT grams`,
         year = YEAR,
         origin = `LIKELY ORIGIN (uusittu smolttidataa vast)`
         ) %>%
  mutate(sex = if_else(SEX == 2, "f", "m", missing = NA), 
         country = "FIN",
         # likely origin where 5 == uncertain becomes NA 
         origin = case_when(origin == 1 ~ "wild",
                            origin %in% c(2,3,4) ~ "reared",
                            .default = NA),
         site = "Tornionjoki")

# France length at age
fra.sallaa <- read_csv2(file = "/Users/vitl0001/Documents/Projects/DIASPARA/Incoming data/salmon/SAT_TailleAge_France2.csv", locale = locale(encoding="latin1")) %>%
  rename(age.sea = sea_age,
         age.sm = smolt_age,
         ) %>%
  mutate(date = dmy_hm(cam_date_heure_fin, truncated = 2),
         year = year(date),
         site = str_to_title(sita_nom),
         origin = NA,
         country = "FRA",
         # WGNAS stock unit
         stock.unit = "France") 
  
# str(swe.sallaa)
# str(fin.sallaa)
# str(fra.sallaa)

1b. Read Fecundity at length data

Six data sets build the fecundity length data:

  • Sweden 1 - Baltic Sea
    • Collected in river Umeälven (and tributary Vindelälven) and Dalälven
    • Total fecundity available (stripped + dissected). I.e. All eggs are counted!
    • Also trout data that is filtered out.
  • Sweden 2 - Swedish west coast
    • Collected in rearing station in river Göta älv
    • All fin-clipped individuals.
    • Only stripped (from what V.T. knows, methods description lacking atm)
    • Not total fecundity as in Sweden 1
  • Finland 1
    • Described in the report “HYDROACOUSTIC ASSESSMENT OF SALMON IN THE RIVER TORNIONJOKI - FINAL REPORT, EU STUDY PROJECT 96-069”
    • Total fecundity available (stripped + dissected). I.e. an estimation of all eggs!
    • From river Tornionjoki
    • Both reared (finclipped) and wild individuals (adipose fin intact) but the majority are wild.
  • Finland 2
    • From river Tornionjoki and Simojoki
    • wild, reared and NA mix of individuals
  • France
    • Described in the Samarch report “Changes in sex ratio and fecundity of salmonids” (M. Nevoux et al. 2020, Deliverable D3.3.1)
    • Methods for assessing fecundity is stripping and a subset of the volume (or weight) of eggs were counted, then the total fecundity was extrapolated based on the total volume (weight) of the stripped eggs (pers. com M. Nevoux)
    • From 13 rivers in three regions
    • Origin should mainly be wild (pers. com. M. Nevoux)

The data is filtered in Section 4 when combining data.

Show code
# Fecundity Sweden Baltic
swe.salfec <- read_csv(file = "/Users/vitl0001/Documents/Projects/DIASPARA/Incoming data/salmon/Fekunditetsdata_Dal_Ume_20241213.csv") %>%
  filter(Species == "Salmon") %>%
  rename(n.eggs = `No eggs total`,
         length = `Length (cm)`,
         weight = `Weight before stripping (kg)`,
         origin = `Wild/Reared`,
         site = River,
         ) %>%
  mutate(origin = tolower(origin),
         country = "SWE",
         year = as.numeric(Year), 
         # to grams 
         weight = weight*1000, 
         n.eggs = as.numeric(str_remove_all(n.eggs, " ")),
         # to mm 
         length = if_else(is.na(length), NA, length*10))
        
# Fecundity Sweden Göta älv 2024
swe.salfec2 <- read_csv(file = "/Users/vitl0001/Documents/Projects/DIASPARA/Incoming data/salmon/fecundity_Gotaalv_2024.csv") %>%
  rename(n.eggs = `ROM SKATTA ANTAL (ST)`,
         length = `Längd (cm)`,
         weight = `Vikt (kg)`,
         ) %>%
  mutate(origin = if_else(fettfena == "ej","reared","wild"),
         site = "Göta älv",
         country = "SWE",
         year = 2024, 
         # to grams 
         weight = weight*1000,
         # to mm
         length = length*10) 

# Fecundity Finland 1996-1998
fin.salfec1 <- read_delim(file = "/Users/vitl0001/Documents/Projects/DIASPARA/Incoming data/salmon/Tornionjoki_1996-1998_fecundity.csv") %>%
  rename(n.eggs = `TOTAL FECUNDITY (EXCL. UNCERTAIN OOZYTES)`,
         length = LENGTH,
         weight = WEIGTH,
         year = YEAR
        ) %>%
  mutate(country = "FIN",
         site = "Tornionjoki",
         origin = case_when(`ADIPOSE FIN (1=CUT, 2=INTACT)` == "2" ~ "reared",
                            `ADIPOSE FIN (1=CUT, 2=INTACT)` == "1" ~ "wild",
                            `ADIPOSE FIN (1=CUT, 2=INTACT)` == "0" ~ "uncertain",
                            .default = as.character(`ADIPOSE FIN (1=CUT, 2=INTACT)`)))

# Fecundity Finland 2006
fin.salfec2 <- read_csv(file = "/Users/vitl0001/Documents/Projects/DIASPARA/Incoming data/salmon/Tornionjoki_fecundity_2006.csv") %>%
  rename(origin=`ORIGIN (1=WILD 2=REARED)`) %>%
  mutate(site = "Tornionjoki",
         origin = case_when(origin == "2" ~ "reared",
                            origin == "1" ~ "wild",
                            .default = NA)) %>%
  bind_rows(read_csv(file = "/Users/vitl0001/Documents/Projects/DIASPARA/Incoming data/salmon/Simojoki_fecundity_2006.csv") %>%
               mutate(site = "Simojoki",
                      origin = NA) ) %>%
  rename(length = `LENGTH (mm)`,
         n.eggs = `number eggs`) %>%
  mutate(year = 2006,
         country = "FIN") 

# Fecundity France
fra.salfec <- read_csv(file = "/Users/vitl0001/Documents/Projects/DIASPARA/Incoming data/salmon/Fecondite_SAT_Bilan.csv") %>%
  rename(n.eggs = Fecondite,
         length.f = Lf,
         length.t = Lt,
         weight = Poids,
         year = Annee,
         site = Origine) %>%
  mutate(country = "FRA",
         origin = NA)

# str(swe.salfec)
# str(swe.salfec2)
# str(fin.salfec1)
# str(fin.salfec2)
# str(fra.salfec)

2. Spatial aggregations - Baltic Assessment and Atlantic Stock units and Regions

To add spatial information, site name in the data is matched (dplyr::left_join():ed) with site name in the tables created below (SweFin.rivers and French.rivers).

The spatial information added is:

  • Coordinates (WGS84 DD). French coordinates either provided by Hilaire Drouineau or added manually by V.T., they are at the river mouth (entering the sea or from a tributary) or for the Baie Mont St Michelle in the center of the bay. These are not catch places. For many tributary rivers, the main river mouth is used and for those rivers,the names are changed in 3a when combining data. Swedish coordinates eother existing in the data or added manually (river mouth, not catch place).
  • Stock unit (for WGNAS stock unit Sweden and France),
  • Assessment unit (for WGBAST),
  • Region and genetic region from French regions from Perrier et al. 2011 (doi: 10.1111/j.1365-294X.2011.05266.x), and provided by M. Nevoux, existing in the french fecundity data or added manually by V.T with input from M. Nevoux. Swedish regions are defined as Baltic Sea and Swedish west coast and Finnish is Baltic Sea.
Show code
# Assessment units of the index rivers in Sweden and Finland. 
AU.rivers = bind_cols(site = c("Tornionjoki","Simojoki","Kalixälven","Råneälven","Piteälven","Åbyälven","Byskeälven","Rickleån","Sävarån","Vindelälven","Öreälven","Lögdeälven","Ljungan","Mörrumsån","Emån", "Kågeälven","Testeboån", "Umeälven", "Dalälven", "Luleälven","Muonionjoki"),
                      asses.unit = c(1,1,1,1,2,2,2,2,2,2,2,2,3,4,4,2,3,2,3,2,1), 
                      stock.origin = "wild") %>%
  bind_rows(bind_cols(site = c("Torneälven_hatchery","Luleälven_(RG_with_Pite)","Iijoki","Oulujoki","Skellefteälven","Umeälven_(RG_with_Vindel)","Ångermanälven","Indalsälven_(RG_with_Ljungan)","Ljusnan","Dalälven_(RG_with_Testeboån)", "Torneälven","Gideälven"), 
                      asses.unit = c(1,2,1,1,2,2,3,3,3,3,1,2), 
                      stock.origin = "reared")) 



# And the Swedish rivers entering the the Western sea, i.e. WGNAS stock unit "Sweden".
SU.rivers = bind_cols(site = c("Ätran","Örekilsälven","Göta älv","Lagan","Västerhavet (hela) ICES SD 20-21","Genevadsån","Fylleån","Stensån"),
                      stock.unit = "Sweden", 
                      stock.origin = NA)

# Add AU and SU to lat lons from Swedish sötebasen
SweFin.rivers <- swe.sallaa %>%
  drop_na(length) %>%
  distinct(site, WGS84_N_Vatten, WGS84_E_Vatten) %>%
  rename(lat = WGS84_N_Vatten,
         lon = WGS84_E_Vatten) %>%
  mutate(lat = case_when(site == "Östersjön (hela) ICES SD 22-32" ~ 58.475309,
                         .default = lat),
         lon = case_when(site == "Östersjön (hela) ICES SD 22-32" ~ 19.780140,
                         .default = lon)) %>%
  bind_rows(data.frame(site = c("Tornionjoki", "Simojoki", "Muonionjoki")) %>%
              mutate(lat = case_when(site %in% c("Tornionjoki", "Muonionjoki") ~ 65.879905,
                                     site == "Simojoki" ~ 65.625639,
                                     site == "Östersjön (hela) ICES SD 22-32" ~ 58.475309,
                                     site == "Gideälven" ~ 63.327482,
                                     .default = NA),
                     lon = case_when(site %in% c("Tornionjoki", "Simojoki", "Muonionjoki") ~ 24.136424,
                                     site == "Simojoki" ~ 25.052169,
                                     site == "Östersjön (hela) ICES SD 22-32" ~ 19.780140,
                                     site == "Gideälven" ~ 19.140244,
                                      .default = NA))) %>%
  left_join(AU.rivers) %>%
  left_join(SU.rivers) %>%
  mutate(region = if_else(stock.unit == "Sweden", "Swedish.westcoast", "Baltic.sea", missing = "Baltic.sea" )) 

# French rivers by region and sub-regions from Marie Nevoux
fra.rivers <- read_csv(file =  "/Users/vitl0001/Documents/Projects/DIASPARA/riviere_region_France.csv") %>%
  mutate(site = str_to_title(river),
         stock.unit = "France")

# French site abbreviations for regional genotypic aggregations from Perrier et al. 2011
fra.rivabb <- read_delim(file =  "/Users/vitl0001/Documents/Projects/DIASPARA/french_genotypes_Perrier.txt", delim = "\t") %>% 
  rename(site = River) %>%
  mutate(site.abb = str_to_upper(str_sub(site,start = 1, end = 3)),
         site.abb = case_when(site == "NIVE"  ~ "NIE",
                               site == "NIVELLE"  ~ "NIL",
                               .default = site.abb),
         site = str_to_title(site)) %>%
  mutate(region.gen = case_when(site.abb %in% c("COU","TRI","DOU","LEG","STE","AUL","GOY","ELO","ELL","PEN","ODE","AVE","JET","SCO","BLA") ~ "Brittany",
                            site.abb %in% c("ORN", "VIR","SEI","SAI","SIE","SEL","SEE") ~ "Lower-Normandy",
                            site.abb %in% c("NIL","NIE","GAV") ~ "Adour",
                            site.abb %in% c("GAR","DOR","ALL") ~ "Allier-Gironde",
                            site.abb %in% c("TOU","VAL","AUT","CAN","BRE","ARQ") ~ "Upper-Normandy",
                            .default = NA))

# French site coordinates from Hilaire Drouineau
fra.rivers.sf <- read_sf("/Users/vitl0001/Documents/Projects/DIASPARA/salmon_frarivers", stringsAsFactors = FALSE)

# Combine all French site info
fra.rivers2  <- fra.rivers %>%
  full_join(fra.rivabb) %>%
  bind_rows(# adding missing rivers from fra.sallaa
            tibble(site = c("Isole", "Etel", "Quillec", "Horn", "St Laurent"), region.gen = "Brittany", stock.unit = "France"),
            tibble(site = c("Baie Du Mont Saint Michel","Thar"), region.gen = "Lower-Normandy", stock.unit = "France"),
            tibble(site = c("Loire"), region.gen = "Allier-Gironde", stock.unit = "France"),
            tibble(site = c("Durdent"), region.gen = "Upper-Normandy", stock.unit = "France")) %>% 
  left_join(st_coordinates(fra.rivers.sf) %>%
              as.data.frame() %>%
              rename(lon = X,
                     lat = Y) %>%
              bind_cols(st_set_geometry(fra.rivers.sf, NULL)) %>% rename(site = "river")) %>%
  # assign region.gen to those missing
  mutate(region.gen = case_when(site == "Oir" ~ "Lower-Normandy", 
                                subregion == "Adour" & is.na(site.abb) ~ "Adour",
                                subregion == "Gironde" & is.na(site.abb) ~ "Allier-Gironde",
                                region == "Bretagne" & is.na(site.abb) ~ "Brittany",
                                .default = region.gen)) %>%
  # remove those " + affl" which are tributaries, Gave Mauleon (Le Saison) which is Gave Mauleon, "Gave'oloron duplictae and See Selune which are exists individually.
  filter(!site %in% c("See Selune","Odet + Affl","Elle + Affl","Gave Mauleon (Le Saison)", "Gave D'oloron") ) %>% 
  # add lat and lons where missing.
  mutate(lat = case_when(site == "Baie Du Mont Saint Michel" ~ 48.655943,
                         site == "Valmont" ~ 49.761966,
                         site == "Seine" ~ 49.435474,
                         site == "Isole" ~ 47.874431,
                         site == "Loire" ~ 47.281585,
                         site == "Finistere" ~ 48.306467, 
                         site == "Etel" ~ 47.656579,
                         site == "Quillec" ~ 48.685033,
                         site == "Etel" ~ 47.656579,
                         site == "Thar" ~ 48.800103,
                         site == "Loire" ~ 47.281585,
                         site == "Horn" ~ 48.688119,
                         site == "Durdent" ~ 48.687806,
                         site == "Canche" ~ 50.527333,
                         site == "Couesnon" ~ 48.625250,
                         site == "St Laurent" ~ 47.903795,
                         .default = lat),
         lon = case_when(site == "Baie Du Mont Saint Michel" ~ -1.656370,
                         site == "Valmont" ~ 0.377126,
                         site == "Seine" ~ 0.285060,
                         site == "Isole" ~ -3.546855,
                         site == "Loire" ~ -2.152414,
                         site == "Finistere" ~ -4.080223,
                         site == "Etel" ~ -3.209520,
                         site == "Quillec" ~ -4.069429,
                         site == "Thar" ~ -1.568264,
                         site == "Loire" ~ -2.152414,
                         site == "Horn" ~ -4.058391,
                         site == "Durdent" ~ 0.608712,
                         site == "Canche" ~ 1.614964,
                         site == "Couesnon" ~ -1.511461,
                         site == "St Laurent" ~ -3.945979,
                         .default = lon)) %>%
  # remove non-necessary info
  select(-subregion,-region,-id,-site.abb) 

3. Length at age

3a. Filter, clean and combine data

  • Keeping only adult individuals (removing NA and 0 sea age individuals).
  • Remove NA values in length and age.
  • Remove individuals from Swedish lakes
  • Remove three mark-recaptured individuals in Swedish data
  • Remove an obvious outlier in the French data
  • Correct an obvious outlier in the French data
  • Calculate total length (\(L_t\)) from fork length (\(L_f\)) where needed in the French data based on the model: \(exp(0.2892351)*L_f^{0.9623479}\) (see below).
  • The french data has two sources of sex identification (observed in the field vs genetic). I use Genetic sex where available and complete these data with field observations. M. Nevoux considers field observations correct only from (mid) August. From the data where both genetic and field method are available, 14% are incorrect. Keep this in mind if using this info. The sex in Finnish (and likely in Swedish data) is determined visually when gutting the fish and far from all are determined (many NAs).

The kept variables are: length, site, country, origin, year, age_ad, sex. Spatial varaibles are added to the data from the tables created in 2.

Show code
# 14 % of the sex determinations in th field are wrong.
fra.sallaa %>%
  drop_na(`Genetic sex`, `Sex observed in the field`) %>%
  rename(gs = `Genetic sex`,
         fs = `Sex observed in the field`) %>%
  filter(gs != fs) %>%
  summarise(perc.incorr = 100*n()/nrow(fra.sallaa %>% drop_na(`Genetic sex`, `Sex observed in the field`)))
drop_na: removed 68,383 rows (95%), 3,738 rows remaining
rename: renamed 2 variables (fs, gs)
filter: removed 3,031 rows (81%), 707 rows remaining
drop_na: removed 68,383 rows (95%), 3,738 rows remaining
summarise: now one row and one column, ungrouped
# A tibble: 1 × 1
  perc.incorr
        <dbl>
1        18.9
Show code
# Model to convert fork length in the French data to total length. V.T. models this relationship using a log-linear model (Lt = a*Lf^b) to estimate a and b. Lt ~ Lf is almost linear but a log(Lt)~log(Lf) relationship makes for a seemingly better fit.
fra.sallaa %>%
  filter(total_length > 100) %>% 
  drop_na(total_length, fork_length) %>%
  lm(log(total_length) ~ log(fork_length), data = .) %>%
  tidy() %>%
  pull(estimate) 
filter: removed 22,077 rows (31%), 50,044 rows remaining
drop_na: removed 49,373 rows (99%), 671 rows remaining
[1] 0.2892351 0.9623479
Show code
#looks good:
fra.sallaa %>%
  filter(total_length > 100) %>% # an obvious outlier where Lf >> Lt
  drop_na(total_length, fork_length) %>%
  ggplot(aes(fork_length, total_length)) +
  geom_point() +
  geom_line(aes(x = fork_length,  y = exp(0.2892351)*fork_length^0.9623479), col = "red") +
  labs(title = "fork to total length fit length at age")
filter: removed 22,077 rows (31%), 50,044 rows remaining
drop_na: removed 49,373 rows (99%), 671 rows remaining

Show code
# There are three mark-recaptured individuals in Swedish db Sötebasaen when NA lengths and ages are removed. As they are so few: Id them and remove the first age-length measurement and keep later one (when they are bigger). They are all recaptured within the same year.
dup.markrec <- swe.sallaa %>%
  # remove NAs
  drop_na(length, age.sea, MärkeNr) %>%
  # find the mark-recaptured ones by counting rows by tag number 
  mutate(n = n(), .by = MärkeNr) %>% 
  # filter the mark-recaps and those with info that is not tags ("finclipped")
  filter(n > 1 & !MärkeNr == "Fenklippt") %>%
  # id the mark (shorter) instance
  slice_min(length, by = MärkeNr)
drop_na: removed 105,549 rows (>99%), 300 rows remaining
mutate: new variable 'n' (integer) with 3 unique values and 0% NA
filter: removed 294 rows (98%), 6 rows remaining
slice_min: removed 3 rows (50%), 3 rows remaining
Show code
# Combine the data
all.sallaa <- swe.sallaa %>%
  # remove the mark-recaps
  anti_join(dup.markrec) %>%
  # remove the lakes in the data
  filter(!site %in% c("Vättern", "Vänern")) %>% 
  drop_na(length) %>%
  dplyr::select("country", "year", "site", "origin", "length", "weight", "AU", "age.sea", "age.sm", "sex") %>%
  bind_rows(fin.sallaa %>% 
              # remove individuals without length or sea age
              drop_na(length) %>% 
              dplyr::select("country", "year", "site", "origin", "length", "weight", "age.sea", "age.sm", "sex")) %>%
  bind_rows(fin.sallaa2 %>%
              # remove individuals without length or sea age
              drop_na(length) %>% 
              dplyr::select("country", "year", "site", "origin", "length", "weight", "age.sea", "age.sm","sex")) %>%
  left_join(SweFin.rivers, by = "site") %>%
  # prefer the existing AU before the new one.
  mutate(asses.unit = if_else(is.na(AU), asses.unit, AU)) %>% 
  dplyr::select(!AU) %>%
  bind_rows(fra.sallaa %>% 
              rename(gen.sex = `Genetic sex`,
                     field.sex = `Sex observed in the field`) %>%
              mutate(# remove french accents and hyphens
                     site = stringi::stri_trans_general(site, "Latin-ASCII"),
                     site = str_replace_all(site,"-"," "),
                     # changing tributaries to main site 
                     site = case_when(site %in% c("Varenne","Bethune") ~ "Arques", 
                                       site %in% c("Inam") ~ "Elle",
                                       site %in% c("Austreberthe") ~ "Seine",
                                       site %in% c("Arroux", "Allier") ~ "Loire",
                                       site %in% c("Jet","Steir") ~ "Odet",
                                       .default = site),
                     # Using observed sex in the field when genetic is missing to complete info
                     sex = str_to_lower(if_else(is.na(gen.sex), field.sex, gen.sex)), 
                     # correct a "1" valued entry to NA
                     sex = if_else(sex %in% c("f","m"), sex, NA),
                     # calculate total from fork length and correct TL outlier: 51 mm and 2 yo
                     length = if_else(is.na(total_length) | total_length == 51, exp(0.2892351)*fork_length^0.9623479, total_length), 
                     # Correct one ind. at 7900 mm and assume it is 790 mm
                     length = ifelse(length > 2000, length/10, length),
                     ) %>% 
              drop_na(length) %>%
              dplyr::select("country", "year", "site", "origin", "length", "age.sea", "age.sm", "sex") %>%
              mutate(weight = NA) %>%
              left_join(fra.rivers2))
Joining with `by = join_by(InsamlingID, Serie, InsamlMetod, AnstrTyp,
IndividID, site, RT90_X_Vatten, RT90_Y_Vatten, S99TM_N_Vatten, S99TM_E_Vatten,
WGS84_N_Vatten, WGS84_E_Vatten, Plats, RT90_X_Plats, RT90_Y_Plats,
S99TM_N_Plats, S99TM_E_Plats, WGS84_N_Plats, WGS84_E_Plats, Subdiv, AU, Syfte,
year, FångstDatum, Art, Åldersprov, IndividNr, length, weight, Behandling, Kön,
stage, Genprov, Märkning, MärkeNr, Märkning2, Märke2Nr, origin, age.sm,
age.sea, Pluszon, AntalLek, ÅlderLek1, ÅlderLek2, ÅlderLek3, Tydlighet,
AnmÅlder, country, sex)`
anti_join: added no columns
> rows only in x 105,846
> rows only in dup.markrec ( 0)
> matched rows ( 3)
> =========
> rows total 105,846
filter: removed 36,152 rows (34%), 69,694 rows remaining
drop_na: removed 3,682 rows (5%), 66,012 rows remaining
drop_na: removed 254 rows (18%), 1,125 rows remaining
drop_na: removed 771 rows (5%), 13,908 rows remaining
left_join: added 6 columns (lat, lon, asses.unit, stock.origin, stock.unit, …)
> rows only in x 0
> rows only in SweFin.rivers ( 4)
> matched rows 81,051 (includes duplicates)
> ========
> rows total 81,051
mutate: changed 1,215 values (1%) of 'asses.unit' (1,140 fewer NAs)
rename: renamed 2 variables (field.sex, gen.sex)
mutate: changed 14,902 values (21%) of 'site' (0 new NAs)
new variable 'sex' (character) with 3 unique values and 42% NA
new variable 'length' (double) with 741 unique values and 0% NA
drop_na: no rows removed
mutate: new variable 'weight' (logical) with one unique value and 100% NA
Joining with `by = join_by(site)`
left_join: added 5 columns (river, stock.unit, region.gen, lon, lat)
> rows only in x 0
> rows only in fra.rivers2 ( 12)
> matched rows 79,187 (includes duplicates)
> ========
> rows total 79,187
Show code
str(all.sallaa)
tibble [160,238 × 17] (S3: tbl_df/tbl/data.frame)
 $ country     : chr [1:160238] "SWE" "SWE" "SWE" "SWE" ...
 $ year        : num [1:160238] 2015 2015 2015 2015 2015 ...
 $ site        : chr [1:160238] "Östersjön (hela) ICES SD 22-32" "Östersjön (hela) ICES SD 22-32" "Östersjön (hela) ICES SD 22-32" "Östersjön (hela) ICES SD 22-32" ...
 $ origin      : chr [1:160238] "wild" "wild" "wild" "reared" ...
 $ length      : num [1:160238] 990 840 860 880 860 1000 860 820 830 840 ...
 $ weight      : num [1:160238] 930000 540000 590000 440000 500000 830000 550000 510000 530000 550000 ...
 $ age.sea     : num [1:160238] 4 2 2 3 3 3 2 2 3 2 ...
 $ age.sm      : num [1:160238] 3 3 2 NA 4 3 3 3 2 3 ...
 $ sex         : chr [1:160238] "m" "f" "f" "f" ...
 $ lat         : num [1:160238] 58.5 58.5 58.5 58.5 58.5 ...
 $ lon         : num [1:160238] 19.8 19.8 19.8 19.8 19.8 ...
 $ asses.unit  : num [1:160238] NA NA NA NA NA NA NA NA NA NA ...
 $ stock.origin: chr [1:160238] NA NA NA NA ...
 $ stock.unit  : chr [1:160238] NA NA NA NA ...
 $ region      : chr [1:160238] "Baltic.sea" "Baltic.sea" "Baltic.sea" "Baltic.sea" ...
 $ river       : chr [1:160238] NA NA NA NA ...
 $ region.gen  : chr [1:160238] NA NA NA NA ...

3b. Create spatial units

Show code
all.sallaa <- all.sallaa %>%
  mutate(spat.unit = case_when(region == "Baltic.sea" ~ paste0(region,":AU-",asses.unit),
                             region == "Swedish.westcoast" ~ region, 
                             .default = region.gen))
mutate: new variable 'spat.unit' (character) with 11 unique values and 0% NA

3c. Correct age types Map and summary of length at age

Show code
all.sallaa2 <- all.sallaa %>% 
  rowwise() %>% # rowwise needed for non vectorised function like sum
  # create life stage and total age columns
  mutate(age.type = case_when(is.na(age.sea) & is.na(age.sm) ~ NA,
                              age.sea == 0 | is.na(age.sea) ~ "smolt.only",
                              age.sea > 0 & age.sm > 0 ~ "both",
                              age.sm == 0 | is.na(age.sm) ~ "sea.only",
                              .default = NA),
         age.tot = sum(age.sea,age.sm, na.rm = TRUE),
         # sum(NA,NA, na.rm = TRUE) is 0 but they should be NA
         age.tot = if_else(age.tot == 0, NA, age.tot)) %>%
  ungroup()
mutate: new variable 'age.type' (character) with 4 unique values and 21% NA
        new variable 'age.tot' (double) with 13 unique values and 21% NA
ungroup: no grouping variables remain
Show code
# Fix ages that are not correctly classified (see 2-2_Salmon_data_v2)
all.sallaa3 <- all.sallaa2 %>%
  # set sea age = smolt age  
  mutate(age.sea = case_when(age.type == "smolt.only" & length >= 290 ~ age.sm,
                                          .default = age.sea),
         # set smolt age = 0,  
         age.sm = case_when(age.type == "smolt.only" & length >= 290 & !is.na(age.sm) ~ 0,
                                          .default = age.sm),
         # reclassify their age types
         age.type = case_when(age.type == "smolt.only" & length >= 290 ~ "sea.only",
                              .default = age.type))
mutate: changed 153 values (<1%) of 'age.sea' (98 fewer NAs)
        changed 127 values (<1%) of 'age.sm' (0 new NAs)
        changed 154 values (<1%) of 'age.type' (0 new NAs)
Show code
# Check. 
# Id sites with smolts
sws <- all.sallaa3 %>% filter(age.type == "smolt.only") %>% distinct(site) %>% pull(site) 
filter: removed 152,778 rows (95%), 7,460 rows remaining
distinct: removed 7,446 rows (>99%), 14 rows remaining
Show code
all.sallaa3 %>%
  drop_na(age.type) %>%
  ggplot(aes(age.tot, length, color = age.type)) +
  geom_point() +
  facet_wrap(~age.type) +
all.sallaa3 %>%
  drop_na(age.type) %>%
  filter(site %in% sws) %>%
  ggplot(aes(age.tot, length, color = age.type)) +
  geom_point() +
  facet_wrap(~site)
drop_na: removed 34,105 rows (21%), 126,133 rows remaining
drop_na: removed 34,105 rows (21%), 126,133 rows remaining
filter: removed 111,577 rows (88%), 14,556 rows remaining
Warning: Removed 67 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 59 rows containing missing values or values outside the scale range
(`geom_point()`).

3d. Correct weights

There is no weight measurements in the French length at age data.

Show code
# Swedish weights are in milligrams*10. So divide by 100 to get grams.
all.sallaa3 %>%
  drop_na(age.tot) %>% # two na-aged swedish fish with wrong lw-relationships removed
  ggplot() +
  geom_point(aes(length, weight, color = country)) +
  facet_wrap(~country, scales = "free")
drop_na: removed 34,172 rows (21%), 126,066 rows remaining
Warning: Removed 81583 rows containing missing values or values outside the scale range
(`geom_point()`).

Show code
all.sallaa4 <- all.sallaa3 %>% mutate(weight = if_else(country == "SWE", weight/100, weight)) 
mutate: changed 59,460 values (37%) of 'weight' (0 new NAs)
Show code
# still some odd l-w:s to fix if weight is used
all.sallaa4 %>%
  drop_na(age.tot) %>%
  ggplot() +
  geom_point(aes(length, weight, color = country))
drop_na: removed 34,172 rows (21%), 126,066 rows remaining
Warning: Removed 81583 rows containing missing values or values outside the scale range
(`geom_point()`).

3d. Map and summary of length at age

Show code
# length at age by a suitable spatial aggregation (spat.unit) is defined as French regions, Swedish west coast and Baltic assessment units. This results in 10 spatial units (plus an NA group which will disappear when the French region information is complete) which should represent genetic and ecological units. There are observations without assessment units in the Baltic as the only spatial information we have is that they are from the Baltic as a whole. 
all.sallaa4 %>%
  ggplot(aes(age.sea, length, color = spat.unit)) +
  geom_point() +
  facet_wrap( ~spat.unit) +
  expand_limits(x = 0)
Warning: Removed 41591 rows containing missing values or values outside the scale range
(`geom_point()`).

Show code
plot_map_Euro +
  geom_point(data = all.sallaa4 %>%
               drop_na(lon, lat) %>%
               mutate(n.years = n_distinct(year), .by=site) %>%
               mutate(count.year = ifelse(n.years > 14, ">15","<15")) %>%
               add_utm_columns(ll_names = c("lon", "lat"), utm_crs = 32633),
             aes(X*1000, Y*1000), color = "#440154FF", size = 0.3, show.legend = TRUE) +
  theme_sleek()
drop_na: no rows removed
mutate: new variable 'n.years' (integer) with 32 unique values and 0% NA
mutate: new variable 'count.year' (character) with 2 unique values and 0% NA

Show code
ggsave("laa_map.png", scale = 0.8)
Saving 5.6 x 4 in image
Show code
all.sallaa4 %>%
  drop_na(lon, lat) %>%
  distinct(lon,lat, .keep_all = TRUE) %>%
  filter(lat < 51,
         lon < -3) %>%
  arrange(lat)
drop_na: no rows removed
distinct: removed 160,129 rows (>99%), 109 rows remaining
filter: removed 80 rows (73%), 29 rows remaining
# A tibble: 29 × 20
   country  year site      origin length weight age.sea age.sm sex     lat   lon
   <chr>   <dbl> <chr>     <chr>   <dbl>  <dbl>   <dbl>  <dbl> <chr> <dbl> <dbl>
 1 FRA      1989 Etel      <NA>     740      NA       2      2 f      47.7 -3.21
 2 FRA      2003 Scorff    <NA>     633.     NA       2      1 f      47.7 -3.35
 3 FRA      1987 Blavet    <NA>     770      NA       2      2 <NA>   47.7 -3.32
 4 FRA      1987 Aven      <NA>     715      NA       2      1 <NA>   47.8 -3.74
 5 FRA      1987 Odet      <NA>     750      NA       2      1 <NA>   47.9 -4.12
 6 FRA      1987 Odet      <NA>     750      NA       2      1 <NA>   47.9 -4.12
 7 FRA      1988 Isole     <NA>     740      NA       2      1 f      47.9 -3.55
 8 FRA      1988 St Laure… <NA>     590      NA       1      1 m      47.9 -3.95
 9 FRA      1987 Goyen     <NA>     800      NA       2      1 <NA>   48.0 -4.54
10 FRA      1987 Aulne     <NA>     780      NA       2      1 <NA>   48.3 -4.28
# ℹ 19 more rows
# ℹ 9 more variables: asses.unit <dbl>, stock.origin <chr>, stock.unit <chr>,
#   region <chr>, river <chr>, region.gen <chr>, spat.unit <chr>,
#   age.type <chr>, age.tot <dbl>
Show code
# ind counts by site FRANCE
all.sallaa4 %>%
  mutate(river1 = paste0(country,":",site)) %>% 
  summarise(count = n(), .by = c(year, country, river1)) %>%
  mutate(count.ind = as.factor(ifelse(count > 50, ">50",
                                  ifelse(count > 30 & count <= 50, ">30",
                                         ifelse(count > 10 & count <= 30, ">10",
                                         "1 - 10")))),
         count.ind = fct_reorder(count.ind, count)) %>%
  filter(country == "FRA") %>%
  ggplot(aes(year, river1, fill = count.ind, group = country)) +
  geom_tile(color = "gray30") +
  scale_fill_viridis_d() +
  theme_light() +
  theme(axis_text=element_text(size=5))
mutate: new variable 'river1' (character) with 107 unique values and 0% NA
summarise: now 1,592 rows and 4 columns, ungrouped
mutate: new variable 'count.ind' (factor) with 4 unique values and 0% NA
filter: removed 310 rows (19%), 1,282 rows remaining
Warning in plot_theme(plot): The `axis_text` theme element is not defined in
the element hierarchy.

Show code
# ind counts by site FINLAND and SWEDEN
all.sallaa4 %>%
  mutate(river1 = paste0(country,":",site)) %>% 
  summarise(count = n(), .by = c(year, country, river1)) %>%
  mutate(count.ind = as.factor(ifelse(count > 50, ">50",
                                  ifelse(count > 30 & count <= 50, ">30",
                                         ifelse(count > 10 & count <= 30, ">10",
                                         "1 - 10")))),
         count.ind = fct_reorder(count.ind, count)) %>%
  filter(!country == "FRA") %>%
  ggplot(aes(year, river1, fill = count.ind, group = country)) +
  geom_tile(color = "gray30") +
  scale_fill_viridis_d() +
  theme_light()
mutate: new variable 'river1' (character) with 107 unique values and 0% NA
summarise: now 1,592 rows and 4 columns, ungrouped
mutate: new variable 'count.ind' (factor) with 4 unique values and 0% NA
filter: removed 1,282 rows (81%), 310 rows remaining
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_tile()`).

By suggested spatial aggregation:

Show code
# ind counts by a suggested spatial aggregation
all.sallaa4 %>%
  summarise(count = n(), .by = c(year, spat.unit)) %>%
  mutate(Ind.count = as.factor(ifelse(count > 50, ">50",
                                      ifelse(count > 30 & count <= 50, ">30",
                                             ifelse(count > 10 & count <= 30, ">10",
                                                    "1 - 10")))),
         Ind.count = fct_reorder(Ind.count, count)) %>% 
  ggplot(aes(year, spat.unit, fill = Ind.count)) +
  geom_tile(color = "gray30") +
  scale_fill_viridis_d() +
  labs(y = "") +
  theme_light()
summarise: now 390 rows and 3 columns, ungrouped
mutate: new variable 'Ind.count' (factor) with 4 unique values and 0% NA
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_tile()`).

Show code
ggsave("laa_temp.png", scale = 0.8)
Saving 5.6 x 4 in image
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_tile()`).
Show code
# sex information by suggested spatial region
all.sallaa4 %>%
  summarise(ind.count = n(), .by = c(spat.unit, sex)) %>%
  ggplot(aes(ind.count, sex, fill = spat.unit)) +
  geom_bar(stat="identity", position = "stack") +
  scale_fill_viridis_d() 
summarise: now 33 rows and 3 columns, ungrouped

Show code
# Number of individuals by suggested spatial region
all.sallaa4 %>%
  summarise(ind.count = n(), .by = c(year, spat.unit)) %>%
  ggplot(aes(ind.count, spat.unit)) +
  geom_bar(stat="identity", position = "stack", fill = "#440154FF") +
  geom_text(aes(label = ind.count), nudge_x = 2400)
summarise: now 390 rows and 3 columns, ungrouped

4. Fecundity at length

4a. Filter, clean and combine data

  • Remove NA values in length and fecundity (n.eggs)
  • Correct an obvious outlier in the Finnish data 1
  • Calculate total length (\(L_t\))from fork length (\(L_f\)) where needed in the French data based on the model: \(exp(0.05704206)*L_f^{0.99670332}\) (see below).

The kept variables are: length, site, country, origin, year, n.eggs. Spatial variables are added to the data from the tables created in 2.

Show code
# Model to convert French fork lengths in the fecundity data to total lengths
fra.salfec %>%
  drop_na(length.t, length.f) %>%
  lm(log(length.t) ~ log(length.f), data = .) %>%
  tidy() %>%
  pull(estimate) 
drop_na: removed 226 rows (45%), 276 rows remaining
[1] 0.05704206 0.99670332
Show code
fra.salfec %>%
  drop_na(length.t, length.f) %>%
  ggplot(aes(length.f, length.t)) +
  geom_point() +
  geom_line(aes(x = length.f,  y = exp(0.05704206)*length.f^0.99670332), col = "red") +
  labs(title = "fork to total length fit fecundity")
drop_na: removed 226 rows (45%), 276 rows remaining

Show code
# Combine the data
all.salfec <- swe.salfec %>%
  drop_na(length, n.eggs) %>%
  dplyr::select("length", "weight", "site", "country", "origin", "year", "n.eggs") %>%
  bind_rows(swe.salfec2 %>% 
              drop_na(length, n.eggs) %>%
              dplyr::select("length", "weight", "site", "country", "origin", "year", "n.eggs")) %>%
  bind_rows(fin.salfec1 %>% 
              drop_na(length, n.eggs) %>%
              dplyr::select("length", "weight", "site", "country", "origin", "year", "n.eggs")) %>%
  bind_rows(fin.salfec2 %>% 
              drop_na(length, n.eggs) %>%
              # correct an obvious error (cm and not mm)
              mutate(length = if_else(length == 85, 850, length),
                     weight = NA) %>% 
              dplyr::select("length", "weight", "site", "country", "origin", "year", "n.eggs")) %>%
  left_join(SweFin.rivers) %>%
  bind_rows(fra.salfec %>%
              mutate(length = if_else(is.na(length.t), exp(0.05704206)*length.f^0.99670332, length.t)) %>%
              drop_na(length, n.eggs) %>%
              dplyr::select("length", "weight", "site", "country", "origin", "year", "n.eggs") %>%
              left_join(fra.rivers2))
drop_na: removed 90 rows (12%), 676 rows remaining
drop_na: no rows removed
drop_na: removed 2 rows (1%), 191 rows remaining
drop_na: removed 16 rows (29%), 39 rows remaining
mutate: changed one value (3%) of 'length' (0 new NAs)
        new variable 'weight' (logical) with one unique value and 100% NA
Joining with `by = join_by(site)`
left_join: added 6 columns (lat, lon, asses.unit, stock.origin, stock.unit, …)
           > rows only in x                0
           > rows only in SweFin.rivers ( 51)
           > matched rows                926
           >                            =====
           > rows total                  926
mutate: new variable 'length' (double) with 260 unique values and 0% NA
drop_na: removed 2 rows (<1%), 500 rows remaining
Joining with `by = join_by(site)`
left_join: added 5 columns (river, stock.unit, region.gen, lon, lat)
           > rows only in x              7
           > rows only in fra.rivers2 ( 55)
           > matched rows              535    (includes duplicates)
           >                          =====
           > rows total                542
Show code
str(all.salfec)
tibble [1,468 × 15] (S3: tbl_df/tbl/data.frame)
 $ length      : num [1:1468] 840 830 710 940 870 860 870 840 960 820 ...
 $ weight      : num [1:1468] 5200 5200 3700 8800 6300 6200 6900 6200 10300 5300 ...
 $ site        : chr [1:1468] "Umeälven" "Umeälven" "Umeälven" "Umeälven" ...
 $ country     : chr [1:1468] "SWE" "SWE" "SWE" "SWE" ...
 $ origin      : chr [1:1468] "reared" "reared" "reared" "reared" ...
 $ year        : num [1:1468] 2005 2005 2005 2005 2005 ...
 $ n.eggs      : num [1:1468] 9338 6433 6779 11208 9418 ...
 $ lat         : num [1:1468] 63.7 63.7 63.7 63.7 63.7 ...
 $ lon         : num [1:1468] 20.3 20.3 20.3 20.3 20.3 ...
 $ asses.unit  : num [1:1468] 2 2 2 2 2 2 2 2 2 2 ...
 $ stock.origin: chr [1:1468] "wild" "wild" "wild" "wild" ...
 $ stock.unit  : chr [1:1468] NA NA NA NA ...
 $ region      : chr [1:1468] "Baltic.sea" "Baltic.sea" "Baltic.sea" "Baltic.sea" ...
 $ river       : chr [1:1468] NA NA NA NA ...
 $ region.gen  : chr [1:1468] NA NA NA NA ...

4b. Create spatial units

Show code
all.salfec <- all.salfec %>%
  mutate(spat.unit = case_when(region == "Baltic.sea" ~ paste0(region,":AU-",asses.unit),
                             region == "Swedish.westcoast" ~ region, 
                             .default = region.gen))
mutate: new variable 'spat.unit' (character) with 9 unique values and <1% NA

4c. Map and temporal summary of fecundity

Seven French individuals from 1974 and 1975 lacks site information.

Show code
# Fecundity at length all data 
all.salfec %>%
  ggplot(aes(length, n.eggs, color = spat.unit)) +
  geom_point() +
  scale_fill_viridis_d() +
  theme_light()

Show code
# map
plot_map_Euro +
  geom_point(data = all.salfec %>% drop_na(lat,lon) %>% add_utm_columns(ll_names = c("lon", "lat"), utm_crs = 32633), aes(X*1000, Y*1000), size = 0.3, color = "#440154FF") 
drop_na: removed 7 rows (<1%), 1,461 rows remaining

Show code
ggsave("fec_map.png", scale = 0.8)
Saving 5.6 x 4 in image
Show code
# by site
all.salfec %>%
  summarise(count = n(), .by = c(year, country, origin, site)) %>%
  mutate(Ind.count = as.factor(ifelse(count > 50, ">50",
                                  ifelse(count > 30 & count <= 50, ">30",
                                         ifelse(count > 10 & count <= 30, ">10",
                                         "1 - 10")))),
         Ind.count = fct_reorder(Ind.count, count)) %>% 
  ggplot(aes(year, site, fill = Ind.count)) +
  geom_tile(color = "gray30") +
  scale_fill_viridis_d() +
  theme_light()
summarise: now 96 rows and 5 columns, ungrouped
mutate: new variable 'Ind.count' (factor) with 4 unique values and 0% NA

Show code
# by spat aggregation
all.salfec %>%
  summarise(count = n(), .by = c(year, country, origin, spat.unit)) %>%
  mutate(Ind.count = as.factor(ifelse(count > 50, ">50",
                                  ifelse(count > 30 & count <= 50, ">30",
                                         ifelse(count > 10 & count <= 30, ">10",
                                         "1 - 10")))),
         Ind.count = fct_reorder(Ind.count, count)) %>% 
  ggplot(aes(year, spat.unit, fill = Ind.count)) +
  geom_tile(color = "gray30") +
  scale_fill_viridis_d() +
  theme_light()
summarise: now 72 rows and 5 columns, ungrouped
mutate: new variable 'Ind.count' (factor) with 4 unique values and 0% NA

Show code
# Number of individuals fecundity 
all.salfec %>%
  drop_na(site) %>%
  summarise(ind.count = n(), .by = spat.unit) %>%
  ggplot(aes(ind.count, spat.unit)) +
  geom_bar(stat="identity", position = "stack", fill = "#440154FF") +
  geom_text(aes(label = ind.count), nudge_x = 20)
drop_na: removed 7 rows (<1%), 1,461 rows remaining
summarise: now 8 rows and 2 columns, ungrouped

5. Build data sets and agregate by spatial aggregation

Show code
# length at age
salmon.laa <- all.sallaa4

# fecundity
salmon.fec <- all.salfec 

str(salmon.fec)
tibble [1,468 × 16] (S3: tbl_df/tbl/data.frame)
 $ length      : num [1:1468] 840 830 710 940 870 860 870 840 960 820 ...
 $ weight      : num [1:1468] 5200 5200 3700 8800 6300 6200 6900 6200 10300 5300 ...
 $ site        : chr [1:1468] "Umeälven" "Umeälven" "Umeälven" "Umeälven" ...
 $ country     : chr [1:1468] "SWE" "SWE" "SWE" "SWE" ...
 $ origin      : chr [1:1468] "reared" "reared" "reared" "reared" ...
 $ year        : num [1:1468] 2005 2005 2005 2005 2005 ...
 $ n.eggs      : num [1:1468] 9338 6433 6779 11208 9418 ...
 $ lat         : num [1:1468] 63.7 63.7 63.7 63.7 63.7 ...
 $ lon         : num [1:1468] 20.3 20.3 20.3 20.3 20.3 ...
 $ asses.unit  : num [1:1468] 2 2 2 2 2 2 2 2 2 2 ...
 $ stock.origin: chr [1:1468] "wild" "wild" "wild" "wild" ...
 $ stock.unit  : chr [1:1468] NA NA NA NA ...
 $ region      : chr [1:1468] "Baltic.sea" "Baltic.sea" "Baltic.sea" "Baltic.sea" ...
 $ river       : chr [1:1468] NA NA NA NA ...
 $ region.gen  : chr [1:1468] NA NA NA NA ...
 $ spat.unit   : chr [1:1468] "Baltic.sea:AU-2" "Baltic.sea:AU-2" "Baltic.sea:AU-2" "Baltic.sea:AU-2" ...
Show code
str(salmon.laa)
tibble [160,238 × 20] (S3: tbl_df/tbl/data.frame)
 $ country     : chr [1:160238] "SWE" "SWE" "SWE" "SWE" ...
 $ year        : num [1:160238] 2015 2015 2015 2015 2015 ...
 $ site        : chr [1:160238] "Östersjön (hela) ICES SD 22-32" "Östersjön (hela) ICES SD 22-32" "Östersjön (hela) ICES SD 22-32" "Östersjön (hela) ICES SD 22-32" ...
 $ origin      : chr [1:160238] "wild" "wild" "wild" "reared" ...
 $ length      : num [1:160238] 990 840 860 880 860 1000 860 820 830 840 ...
 $ weight      : num [1:160238] 9300 5400 5900 4400 5000 8300 5500 5100 5300 5500 ...
 $ age.sea     : num [1:160238] 4 2 2 3 3 3 2 2 3 2 ...
 $ age.sm      : num [1:160238] 3 3 2 NA 4 3 3 3 2 3 ...
 $ sex         : chr [1:160238] "m" "f" "f" "f" ...
 $ lat         : num [1:160238] 58.5 58.5 58.5 58.5 58.5 ...
 $ lon         : num [1:160238] 19.8 19.8 19.8 19.8 19.8 ...
 $ asses.unit  : num [1:160238] NA NA NA NA NA NA NA NA NA NA ...
 $ stock.origin: chr [1:160238] NA NA NA NA ...
 $ stock.unit  : chr [1:160238] NA NA NA NA ...
 $ region      : chr [1:160238] "Baltic.sea" "Baltic.sea" "Baltic.sea" "Baltic.sea" ...
 $ river       : chr [1:160238] NA NA NA NA ...
 $ region.gen  : chr [1:160238] NA NA NA NA ...
 $ spat.unit   : chr [1:160238] "Baltic.sea:AU-NA" "Baltic.sea:AU-NA" "Baltic.sea:AU-NA" "Baltic.sea:AU-NA" ...
 $ age.type    : chr [1:160238] "both" "both" "both" "sea.only" ...
 $ age.tot     : num [1:160238] 7 5 4 3 7 6 5 5 5 5 ...
Show code
# salmon.laa %>%
#   distinct(year,spat.unit) %>%
#   count(spat.unit) %>%
#   mutate(m = median(n))
#     
# salmon.laa %>%
#   count(sex,country) %>%
#   mutate(x = n, .by = c(sex,country))
# 
# salmon.fec %>% 
#   distinct(spat.unit, year) %>%
#   count(spat.unit) %>%
# 
# salmon.laa %>%
#   summarise(n = n(), .by = c(country, spat.unit, ))

6. Save data

Show code
saveRDS(salmon.laa, paste0(file = paste0(home,"/data/data-for-2-2/salmon-laa_",Sys.Date(),".RData")))
saveRDS(salmon.fec, paste0(file = paste0(home,"/data/data-for-2-2/salmon-fec_",Sys.Date(),".RData")))